library(dtwclust)
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
## Loading required package: dtw
## Loaded dtw v1.23-1. See ?dtw for help, citation("dtw") for use in publication.
## dtwclust:
## Setting random number generator to L'Ecuyer-CMRG (see RNGkind()).
## To read the included vignettes type: browseVignettes("dtwclust").
## See news(package = "dtwclust") after package updates.
library(dtw)
df<-read.csv("../clean_data/score_list_year.csv")
df
df <- subset(df, select = -c(X) )
df
#colnames(df)[colnames(df) == "X2000"] = "2000"
colnames(df)[colnames(df) == "X2003"] = "2003"
colnames(df)[colnames(df) == "X2005"] = "2005"
colnames(df)[colnames(df) == "X2007"] = "2007"
colnames(df)[colnames(df) == "X2009"] = "2009"
colnames(df)[colnames(df) == "X2011"] = "2011"
colnames(df)[colnames(df) == "X2013"] = "2013"
colnames(df)[colnames(df) == "X2015"] = "2015"
colnames(df)[colnames(df) == "X2017"] = "2017"
colnames(df)[colnames(df) == "X2019"] = "2019"
colnames(df)[colnames(df) == "X2022"] = "2022"
df
jurisdiction = df[['Jurisdiction']]
dtw_df <- df[, -1]
head(dtw_df)
df_lst <- tslist(dtw_df)
remove_nan <- function(ts) {
ts[!is.na(ts)]
}
# Apply the function to each time series in the list
df_lst <- lapply(df_lst, remove_nan)
head(df_lst)
## $`1`
## [1] 230 230 232 231 233 231 228 229 225 223
##
## $`2`
## [1] 226 232 230 236 236 236 237 237 236 233
##
## $`3`
## [1] 232 238 234 238 240 235 230 232 230 229
##
## $`4`
## [1] 228 233 234 235 240 238 238 238 236 229
##
## $`5`
## [1] 230 235 232 232 234 234 232 230 230 227
##
## $`6`
## [1] 236 242 241 242 247 244 243 240 239 235
df_cvi <- list()
for (i in 2:10){
df_clust <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw_basic", centroid = "pam")
df_metric <- cvi(df_clust, type = "valid", log.base = 10)
df_cvi <- append(df_cvi, list(df_metric))
}
df_cvi_ma <- do.call(rbind, df_cvi)
rw <- c("K2","K3","K4","K5","K6","K7","K8","K9","K10")
rownames(df_cvi_ma) <- rw
print(df_cvi_ma)
## Sil SF CH DB DBstar D COP
## K2 0.5327527 0 43.28736 0.9210059 0.9210059 0.08888889 0.23987430
## K3 0.3863705 0 28.30209 0.7932507 0.9258264 0.08888889 0.21514668
## K4 0.1295117 0 22.97979 1.6732991 1.8801323 0.08181818 0.16686699
## K5 0.2361482 0 24.91268 1.0158369 1.1395415 0.13888889 0.14873193
## K6 0.1976977 0 15.29492 1.3165721 1.6292335 0.13513514 0.14320138
## K7 0.2262670 0 16.60267 1.1701042 1.2959175 0.17241379 0.10868303
## K8 0.1497312 0 12.65337 1.5293468 1.8700981 0.22448980 0.11383528
## K9 0.1015162 0 12.06658 1.0970361 1.4288178 0.15873016 0.11301479
## K10 0.1926633 0 11.76796 1.4252631 1.6888127 0.22727273 0.09079714
– “Sil” (!): Silhouette index (Rousseeuw (1987); to be maximized).-K4 – “SF” (~): Score Function (Saitta et al. (2007); to be maximized; see notes). – “CH” (~): Calinski-Harabasz index (Arbelaitz et al. (2013); to be maximized).-k3 – “DB” (?): Davies-Bouldin index (Arbelaitz et al. (2013); to be minimized).k4 – “DBstar” (?): Modified Davies-Bouldin index (DB*) (Kim and Ramakrishna (2005); to be minimized). -k4 – “D” (!): Dunn index (Arbelaitz et al. (2013); to be maximized). k5 – “COP” (!): COP index (Arbelaitz et al. (2013); to be minimized). k9
# different seeds
df_cvi2 <- list()
for (i in 1:100){
df_clust2 <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw_basic", centroid = "pam", seed=i)
df_metric2 <- cvi(df_clust2, type = "valid", log.base = 10)
df_cvi2 <- append(df_cvi2, list(df_metric2))
}
df_cvi_ma2 <- do.call(rbind, df_cvi2)
rw2 <- as.character(seq(1, 100))
rownames(df_cvi_ma2) <- rw2
print(df_cvi_ma2)
## Sil SF CH DB DBstar D COP
## 1 0.1874888 0 18.25596 1.3053106 1.8287413 0.08396947 0.1917782
## 2 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 3 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 4 0.1812487 0 26.39508 1.6927962 2.0586129 0.10000000 0.1595358
## 5 0.1923599 0 19.30357 1.2095514 1.6587284 0.08396947 0.1921932
## 6 0.2507965 0 19.95531 1.3104726 1.7536605 0.08148148 0.2070441
## 7 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 8 0.2142639 0 22.92645 1.3688596 1.4739257 0.14864865 0.1638778
## 9 0.2311142 0 23.73130 1.1148819 1.1888411 0.13333333 0.1651468
## 10 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 11 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 12 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 13 0.2830807 0 19.19311 1.3461046 1.8551322 0.08888889 0.2064483
## 14 0.2061031 0 18.53913 1.3828375 1.8650863 0.10714286 0.1775404
## 15 0.1440264 0 20.38992 1.8710270 2.1120158 0.09803922 0.1776847
## 16 0.2061031 0 18.53913 1.3828375 1.8650863 0.10714286 0.1775404
## 17 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 18 0.2756091 0 20.92445 1.4813641 1.5843049 0.08888889 0.1952426
## 19 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 20 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 21 0.1332506 0 15.41930 1.4563920 1.5887485 0.08888889 0.2194493
## 22 0.1975725 0 21.11861 1.3645201 1.5256517 0.13750000 0.1576086
## 23 0.3073942 0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
## 24 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 25 0.2556192 0 20.88889 1.8150376 2.0616855 0.08888889 0.1917037
## 26 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 27 0.2643667 0 19.45326 1.4257934 1.8834036 0.08888889 0.2052309
## 28 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 29 0.2359621 0 17.80887 1.0671383 1.3250603 0.08928571 0.1846214
## 30 0.2643303 0 23.68913 1.5044179 1.6082708 0.14634146 0.1623024
## 31 0.2198519 0 18.46799 1.3344687 1.8283955 0.08148148 0.2023604
## 32 0.1923599 0 19.30357 1.2095514 1.6587284 0.08396947 0.1921932
## 33 0.2909841 0 22.44752 1.0798717 1.2153745 0.14492754 0.1547814
## 34 0.2611797 0 24.10611 1.5499854 1.5889159 0.10989011 0.1684780
## 35 0.3907959 0 24.86772 1.0765682 1.2718168 0.14634146 0.1685981
## 36 0.2634083 0 22.70326 1.2548293 1.3077921 0.13513514 0.1621190
## 37 0.1680235 0 18.82338 1.2568437 1.7326249 0.08148148 0.2077470
## 38 0.2403821 0 21.53393 1.3588650 1.4736465 0.13513514 0.1677886
## 39 0.2379958 0 23.99054 1.2376829 1.3529575 0.13513514 0.1704521
## 40 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 41 0.2302802 0 17.49444 1.2092021 1.5823800 0.07633588 0.1926561
## 42 0.2102223 0 18.20317 1.2852829 1.5978954 0.11764706 0.1790028
## 43 0.2710659 0 22.93085 1.6051506 1.6544439 0.14285714 0.1740123
## 44 0.2143036 0 26.28056 1.1814812 1.2643829 0.11842105 0.1468198
## 45 0.3076958 0 24.30530 1.0263258 1.1141560 0.14492754 0.1529607
## 46 0.2284966 0 21.51658 1.7707353 1.8736810 0.14285714 0.1662717
## 47 0.2798071 0 22.39459 1.2381613 1.4589752 0.14492754 0.1512425
## 48 0.1955145 0 28.86993 1.3117774 1.6050695 0.10000000 0.1582702
## 49 0.3073942 0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
## 50 0.3256208 0 31.14627 0.9078543 0.9346023 0.18055556 0.1547116
## 51 0.2798071 0 22.39459 1.2381613 1.4589752 0.14492754 0.1512425
## 52 0.1941546 0 14.12701 2.5014473 2.8671010 0.08396947 0.2068573
## 53 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 54 0.2233182 0 26.66568 1.1781803 1.2631824 0.11842105 0.1467950
## 55 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 56 0.1911848 0 23.52348 1.8284677 1.9798721 0.12195122 0.1617180
## 57 0.2128360 0 22.79792 1.9260579 2.0171403 0.13414634 0.1536275
## 58 0.2168573 0 18.38749 1.4443697 2.0629950 0.08148148 0.1996360
## 59 0.2823329 0 19.36654 1.2757070 1.7666756 0.08888889 0.2064461
## 60 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 61 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
## 62 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 63 0.1354536 0 14.17592 1.9028523 2.3671551 0.08928571 0.1945729
## 64 0.3073942 0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
## 65 0.2909841 0 22.44752 1.0798717 1.2153745 0.14492754 0.1547814
## 66 0.3747658 0 30.45188 0.6940726 0.7647676 0.18644068 0.1538688
## 67 0.1900115 0 18.98990 1.1484432 1.1689030 0.13513514 0.1558790
## 68 0.1170078 0 18.68287 1.1717777 1.6333408 0.07633588 0.2021902
## 69 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 70 0.2253970 0 21.77116 1.6244867 1.7485403 0.10989011 0.1609345
## 71 0.2628665 0 20.31148 1.4623198 1.5763948 0.15873016 0.1568197
## 72 0.2879393 0 20.40829 0.9871044 1.4338029 0.08888889 0.2050737
## 73 0.3612311 0 23.06653 1.3378768 1.4198228 0.14634146 0.1673015
## 74 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 75 0.2456433 0 22.39671 1.3276921 1.4104741 0.12162162 0.1605648
## 76 0.2078312 0 18.82915 1.6932078 2.2658465 0.08396947 0.1958815
## 77 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 78 0.2382307 0 20.72286 1.7611842 2.0391447 0.08888889 0.1929157
## 79 0.2094742 0 23.02502 0.9644303 1.0667018 0.12195122 0.1645362
## 80 0.1932313 0 18.46895 1.3260993 1.7516665 0.08928571 0.1809625
## 81 0.2403821 0 21.53393 1.3588650 1.4736465 0.13513514 0.1677886
## 82 0.2118094 0 22.29318 0.9943062 1.1051276 0.11764706 0.1815512
## 83 0.3801553 0 24.84512 1.2020464 1.2857002 0.13414634 0.1644943
## 84 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
## 85 0.2904063 0 24.23261 1.1792735 1.3501068 0.14492754 0.1494218
## 86 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
## 87 0.2756091 0 20.92445 1.4813641 1.5843049 0.08888889 0.1952426
## 88 0.1984354 0 29.00511 1.3175280 1.6084602 0.10000000 0.1584127
## 89 0.3612311 0 23.06653 1.3378768 1.4198228 0.14634146 0.1673015
## 90 0.2391533 0 20.68884 1.8402949 1.9491779 0.13513514 0.1618730
## 91 0.2750848 0 22.27727 1.2633329 1.3011376 0.13513514 0.1618074
## 92 0.2634083 0 22.70326 1.2548293 1.3077921 0.13513514 0.1621190
## 93 0.3582754 0 28.67233 0.6869252 0.7535149 0.13750000 0.1549651
## 94 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
## 95 0.2094742 0 23.02502 0.9644303 1.0667018 0.12195122 0.1645362
## 96 0.3546810 0 25.16784 1.0903469 1.1321172 0.10975610 0.1633902
## 97 0.3656055 0 24.86673 1.1657459 1.2349088 0.14634146 0.1654051
## 98 0.1932916 0 17.70319 1.3395919 1.8360966 0.08396947 0.1921884
## 99 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
## 100 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
for (i in 2:10){df_clust_opt <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt)}
for (i in 1:10){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
for (i in 11:20){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
for (i in 21:30){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
#k4
df_clust_opt_final <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt_final)
# Extract cluster assignments
cluster_assignments <- df_clust_opt_final@cluster
# Determine the number of clusters
num_clusters <- max(cluster_assignments)
# Loop through each cluster and print the jurisdictions in it
for (cluster_number in 1:num_clusters) {
cat("Jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Print the jurisdictions corresponding to these indices
print(jurisdiction[indices_in_cluster])
cat("\n") # Add a newline for readability
}
## Jurisdictions in Cluster 1 :
## [1] "Alaska" "Delaware" "Illinois" "Michigan"
## [5] "New York" "Oklahoma" "Oregon" "South Carolina"
## [9] "West Virginia"
##
## Jurisdictions in Cluster 2 :
## [1] "Connecticut" "Florida" "Indiana" "Iowa"
## [5] "Massachusetts" "Minnesota" "Montana" "Nebraska"
## [9] "New Hampshire" "New Jersey" "North Dakota" "Pennsylvania"
## [13] "South Dakota" "Texas" "Utah" "Virginia"
## [17] "Wisconsin" "Wyoming"
##
## Jurisdictions in Cluster 3 :
## [1] "Alabama" "Arizona" "Arkansas" "California" "Georgia"
## [6] "Hawaii" "Kentucky" "Louisiana" "Mississippi" "Missouri"
## [11] "Nevada" "New Mexico" "Rhode Island" "Tennessee"
##
## Jurisdictions in Cluster 4 :
## [1] "Colorado" "Idaho" "Kansas" "Maine"
## [5] "Maryland" "National" "North Carolina" "Ohio"
## [9] "Vermont" "Washington"
# Load necessary libraries
library(ggplot2)
library(reshape2)
# Loop through each cluster
for (cluster_number in 1:num_clusters) {
cat("Plotting jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Get the names of the jurisdictions in this cluster
jurisdictions_in_cluster <- jurisdiction[indices_in_cluster]
# Filter the gap_list_year data frame for these jurisdictions
cluster_data <- df[df$Jurisdiction %in% jurisdictions_in_cluster, ]
# Convert the data to long format for ggplot
long_df <- melt(cluster_data, id.vars = "Jurisdiction", variable.name = "Year", value.name = "Value")
# Plot
p <- ggplot(long_df, aes(x = Year, y = Value, group = Jurisdiction, color = Jurisdiction)) +
geom_line() +
labs(title = paste("Cluster", cluster_number), x = "Year", y = "Scale Score") +
theme(legend.position = "right")
print(p)
#ggsave(paste("cluster_", cluster_number, ".png", sep=""), plot = p)
}
## Plotting jurisdictions in Cluster 1 :
## Plotting jurisdictions in Cluster 2 :
## Plotting jurisdictions in Cluster 3 :
## Plotting jurisdictions in Cluster 4 :